package cn.chiship.framework.business.crawler;

import cn.chiship.sdk.core.id.SnowflakeIdUtil;
import cn.chiship.sdk.core.util.DateUtils;
import cn.chiship.sdk.core.util.JdbcUtil;
import cn.chiship.sdk.core.util.PrintUtil;
import cn.chiship.sdk.core.util.StringUtil;
import com.alibaba.excel.EasyExcelFactory;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;

import java.io.File;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * 导入python采集的中国新闻excel
 *
 * @author lijian
 */
public class ChinaNewsImport {

    JdbcUtil jdbcUtil = null;

    ChinaNewsImport() {
        jdbcUtil = new JdbcUtil("com.mysql.cj.jdbc.Driver",
                "jdbc:mysql://localhost:3306/chiship_common_simple?useUnicode=true&characterEncoding=UTF8&useSSL=false&serverTimezone=Asia/Shanghai",
                "root", "123456");
    }

    /**
     * 读取模式1
     */
    public void read() {
        EasyExcelFactory.read("D:/20241020.xlsx", new AnalysisEventListener() {
            @Override
            public void invoke(Object o, AnalysisContext analysisContext) {
                PrintUtil.console(o);
            }

            @Override
            public void doAfterAllAnalysed(AnalysisContext analysisContext) {
                PrintUtil.console("数据采集完毕");
            }
        }).sheet(0).headRowNumber(1).doRead();
    }

    public void read2(String filePath) {
        String category = "理论\t1007094373136457729\t0020\n" + "国际\t1007094406795747328\t0021\n"
                + "社会\t1007094464723279872\t0022\n" + "财经\t1007094503747084288\t0023\n"
                + "大湾区\t1007094558424031232\t0024\n" + "湾区\t1007094558424031232\t0024\n"
                + "华人\t1007094585984802816\t0025\n" + "文娱\t1007094636672966656\t0026\n"
                + "体育\t1007094664628002816\t0027\n" + "健康\t1007094726229745664\t0029\n"
                + "教育\t1007094780541788160\t002a\n" + "法治\t1007094896220692480\t002b\n"
                + "生活\t1007094926298046464\t002c\n" + "图片\t1007094989078388736\t002d\n"
                + "视频\t1007095023668813824\t002e\n" + "港澳\t1007131135980331008\t0024-0000\n"
                + "湾得福\t1007131189285740544\t0024-0001\n" + "文化\t1007556181295001600\t0026-0000\n"
                + "一带一路\t1007556716647575552\t002f\n" + "科技\t1008250015805071360\t0030\n"
                + "要闻\t720234272785309696\t001d\n" + "时政\t720234311817502720\t001e\n" + "国内\t720234357917097984\t001f";
        Map<String, String> categoryMap = new HashMap<>(7);
        for (String c : category.split("\n")) {
            categoryMap.put(c.split("\t")[0], c.split("\t")[1] + "\t" + c.split("\t")[2]);
        }
        List<ChinaNewsExcelDto> chinaNewsExcelDtos = EasyExcelFactory.read(filePath).sheet(0).headRowNumber(1)
                .head(ChinaNewsExcelDto.class).doReadSync();
        int successCount = 0;
        int failCount = 0;
        for (ChinaNewsExcelDto chinaNewsExcelDto : chinaNewsExcelDtos) {
            String categoryInfo = categoryMap.get(chinaNewsExcelDto.getType());
            if (StringUtil.isNullOrEmpty(categoryInfo)) {
                continue;
            }
            String sql = String.format("INSERT INTO `content_article`(" + "`id`, `gmt_created`, `gmt_modified`, "
                    + " `category_id`, " + " `title`, `keywords`, `meta_description`, "
                    + " `status`, `publish_date`, `source`, `source_url`, " + "`author`, `content`) VALUES ("
                    + "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,?)");
            List<Object> params = new ArrayList<>();
            params.add(SnowflakeIdUtil.generateStrId());
            Long dateTime = DateUtils.dateTime(DateUtils.YYYY_MM_DD_HH_MM_SS, chinaNewsExcelDto.getTime()).getTime();
            params.add(dateTime);
            params.add(dateTime);
            params.add(categoryInfo.split("\t")[0]);
            params.add(chinaNewsExcelDto.getTitle());
            params.add(chinaNewsExcelDto.getKeywords());
            params.add(chinaNewsExcelDto.getDescription());
            params.add(Byte.valueOf("2"));
            params.add(dateTime);
            params.add("中国新闻网");
            params.add(chinaNewsExcelDto.getLink());
            params.add("中国新闻网");
            params.add(chinaNewsExcelDto.getContent());

            try {
                jdbcUtil.updateByParams(sql, params);
                System.out.println(chinaNewsExcelDto.getTitle() + "\t插入成功");
                successCount += 1;
            } catch (SQLException e) {
                System.out.println("报错了" + chinaNewsExcelDto.getTitle());
                failCount += 1;
            }
        }
        System.out.println(String.format("成功%s条，失败%s条", successCount, failCount));
    }

    public static void main(String[] args) {
        String dir = "D:\\文章\\no";
        String[] paths = new File(dir).list();
        if (paths.length == 0) {
            System.out.println("没有可导入的文章");
            return;
        }
        System.out.println(paths.length);
        for (String filePath : paths) {
          new ChinaNewsImport().read2(dir + "\\" + filePath);
        }

    }

}
